************************************************************************************************************************************************
************************ Using Rich Lists to Study the Super-Rich and Top Wealth Inequality: Insights from Switzerland *************************
************************************************************************************************************************************************

clear all
set more off
set scheme s1color  

**Author: Enea Baselgia and Isabel Z. Martinez
**Date: April, 4 2023

cd "$mypath/"
mkdir output

cd "$mypath/output/"
mkdir figures
mkdir tables

***************************************************************************************************************************************************
***** Descriptive Analysis
***************************************************************************************************************************************************

** Chapter 3.1: Family and Individuals **

**gender
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

collapse (count) id_pers, by (year family female)

gen fam_obs = id_pers if family==1
gen female_obs = id_pers if family==0 & female==1
gen male_obs = id_pers if family==0 & female==0

collapse (mean) fam_obs (mean) male_obs (mean) female_obs, by (year)
gen total_obs = fam_obs + female_obs + male_obs

gen female_sh = female_obs / total_obs
gen male_sh = male_obs / total_obs
gen fam_sh = fam_obs / total_obs

label var female_sh "female"
label var male_sh "male"
label var fam_sh "families"

cd "$mypath/output/figures/"

graph bar (asis) fam_sh male_sh female_sh, over(year, label(angle(90) labsize(normalsize))) percentages stack bar(1, fcolor(black) lcolor(black)) ///
bar(2, fcolor(gs11) lcolor(black)) bar(3, fcolor(white) lcolor(black))  ///
legend(row(1) size(normalsize)) ytitle("in % of all ranking entries", size(normalsize)) ylab(, labsize(normalsize) grid) 
graph export "gender_1.pdf", replace


*assigin a gender to family-obs: based on the gender of the primary originator of the family's net wealth
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

tab family
tab female_2

gen female_joint =.
replace female_joint = female
replace female_joint = female_2 if female_joint==.
tab female_joint

drop if  female_joint==99 //  0.73% of all observations missings
gen male_obs = id_pers if female_joint==0
gen female_obs = id_pers if female_joint==1
gen jointly_obs = id_pers if female_joint==7

collapse (count) jointly_obs (count) male_obs (count) female_obs, by (year)
gen total_obs = jointly_obs + female_obs + male_obs

gen female_sh = female_obs / total_obs
gen male_sh = male_obs / total_obs
gen jointly_sh = jointly_obs / total_obs

label var female_sh "female"
label var male_sh "male"
label var jointly_sh "joint"

cd "$mypath/output/figures/"


graph bar (asis) male_sh female_sh jointly_sh, over(year, label(angle(90) labsize(normalsize))) percentages stack bar(1, fcolor(black) lcolor(black)) ///
bar(2, fcolor(gs11) lcolor(black)) bar(3, fcolor(white) lcolor(black))  ///
legend(row(1) size(normalsize)) ytitle("in % of all ranking entries", size(normalsize)) ylab(, labsize(normalsize) grid) 
graph export "gender_2.pdf", replace


**age
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear


/*drop if birth_date==""
drop if birth_date=="n/a"
gen birth_year_str = substr(birth_date,-4,.)
gen birth_year = real(substr(birth_year_str, 1, 4))
*/
gen age = year - birth_year

cd "$mypath/output/figures/"

collapse (mean) age, by (year)


tw  (connect age year if year>=1989, msymb(O) mcolor(black) msize() lcolor(black) lpattern())    ///
	, ytitle("average age of individual ranking entries", size(normalsize)) xtitle("") ///
	ylab(60(2)68, labsize(normalsize) grid gmin gmax) xlab(1990(5)2020, labsize(normalsize) grid) legend(row(1) size(normalsize)) 
	graph export "age.pdf", as(pdf) replace

	
	
** Chapter 3.2: Foreigners **

*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

preserve
sort foreignborn	
by foreignborn: sum wealth_R

collapse (count) num_rich=wealth_mean (sum) wealth_R , by (year foreignborn)	

egen swiss_born	=  mean(num_rich) if foreignborn==0, by(year)
egen foreign_born =  mean(num_rich) if foreignborn==1, by(year)
egen swissW	=  mean(wealth_R) if foreignborn==0, by(year)
egen foreignW = mean(wealth_R) if foreignborn==1, by(year)

collapse (sum) swiss_born (sum) foreign_born (sum) swissW (sum) foreignW, by (year)


gen totalW = foreignW + swissW
gen foreignW_sh = foreignW / totalW	

gen total_bilanz_pop = swiss_born + foreign_born
gen foreign_born_sh = foreign_born /total_bilanz_pop

keep year foreignW_sh foreign_born_sh


tempfile foreigh_shares
save "`foreigh_shares'"
restore
merge m:1 year using "`foreigh_shares'", nogen


collapse (mean) pop_firstgen_u15 pop_tot_u15 pop_swiss pop_foreigners tot_pop foreignW_sh foreign_born_sh, by(year)

gen foreigner_sh = pop_foreigners / tot_pop
gen foreignborn_sh = pop_firstgen_u15 / pop_tot_u15



label var foreignW_sh "foreign-born wealth share in total rich list"
label var foreign_born_sh "foreign-born population share in total rich list"
label var foreigner_sh "foreigners in total population"
label var foreignborn_sh "foreign-borns in total population"
 
//omit the year 1994 bc this has been a special issue with only 50 obs.
replace foreignW_sh=. if year==1994
replace foreign_born_sh=. if year==1994

cd "$mypath/output/figures/"

tw  (connect foreignW_sh year if year>=1989, msymb(O) mcolor(gs0) msize() lcolor(gs0) lpattern())    ///
	(connect foreign_born_sh year if year>=1989, msymb(D) mcolor(gs7) msize() lcolor(gs7) lpattern())    ///
	(connect foreignborn_sh year if year>=1989, msymb(T) mcolor(gs11) msize() lcolor(gs11) lpattern())    ///
	(connect foreigner_sh year if year>=1989, msymb(Sh) mcolor(gs0) msize() lcolor(gs0) lpattern())    ///
	, xtitle("") xline(1993, lpattern(longdash) lcolor(darkgray))  ///
	ylab(0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%", labsize(normalsize) grid ang(h)) xlab(1990(5)2020, grid) legend(row(4) size(medsmall)) 
	
	
graph export "foreigners.pdf", as(pdf) replace


*** foreign-borns by birth-country
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

drop if foreignborn==0

collapse (first) fb_country_name (mean) fb_country_nr (first) fb_country_short, by (id_pers)
collapse (count) id_pers (first) fb_country_name (first) fb_country_short, by (fb_country_nr)

rename id_pers num_foreignborns

sort num_foreignborns

gen other_country=1 if num_foreignborns < 7
replace other_country=0 if other_country==.
replace fb_country_name="other" if other_country==1
sum num_foreignborns if  other_country==1

/*
    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
num_foreig~s |         39    2.487179    1.636295          1          6
*/

bysort other_country: egen total_num_foreignborns = total(num_foreignborns) if other_country==1

replace fb_country_nr=0 if fb_country_nr==8228
replace num_foreignborns=total_num_foreignborns if fb_country_nr==0
replace fb_country_short="OTH" if fb_country_nr==0
replace other_country=0 if fb_country_nr==0

drop if other_country==1
keep num_foreignborns fb_country_name
order fb_country_name num_foreignborns
sort num_foreignborns

egen tot_num_foreignborns = total(num_foreignborns)
gen sh_num_foreignborns = num_foreignborns / tot_num_foreignborns
drop tot_num_foreignborns

cd "$mypath/output/tables/"
export delimited using "tab_b3.csv", replace


	
** Chapter 3.3: Top Managers **
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear


collapse (sum) wealth_R (count) id_pers, by (year manager)

*wealth
egen totalW = sum(wealth_R), by(year)
egen managerW = mean(wealth_R) if manager==1, by(year)
egen non_managerW = mean(wealth_R) if manager==0, by(year)
gen managerW_sh = managerW / totalW
gen non_managerW_sh = non_managerW / totalW

*obs
egen total_pers = sum(id_pers), by(year)
egen manager_pers = mean(id_pers) if manager==1, by(year)
egen non_manager_pers = mean(id_pers) if manager==0, by(year)
gen manager_pers_sh = manager_pers / total_pers
gen non_manager_pers_sh = non_manager_pers / total_pers


collapse (mean) managerW_sh (mean) non_managerW_sh (mean) manager_pers_sh (mean) non_manager_pers_sh, by (year)

replace managerW_sh=0 if managerW_sh==.
replace manager_pers_sh=0 if manager_pers_sh==.

gen W_control = managerW_sh + non_managerW_sh -1
gen pers_control = manager_pers_sh + non_manager_pers_sh -1
drop W_control pers_control



label var managerW_sh "wealth"
label var manager_pers_sh "observations"

cd "$mypath/output/figures/"	
	
tw (connect manager_pers_sh year if year >=1989, msymb(D) mcolor(gs7) msize() lcolor(gs7) lpattern() )    ///
(connect managerW_sh year if year >=1989, msymb(O) mcolor(gs0)  msize() lcolor(gs0) lpattern())  ///
	, ytitle("share of managers in total", size(normalsize)) xtitle("") ///
	ylab(0 "" 0.01 "1%" 0.02 "2%" 0.03 "3%" 0.04 "4%" 0.05 "5%" 0.06 "6%" 0.07 "7%" 0.08 "8%" 0.09 "9%", labsize(normalsize) grid) xlab(1990(5)2020, grid) legend(row(1) size(normalsize))	
	
graph export "managers.pdf", as(pdf) replace
	
	
	

** Chapter 3.4: Industry Composition **
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

*** share of wealth by industry
collapse (sum) wealth_R, by (year industry_main)
egen totalW = sum(wealth_R), by(year)

forvalues i = 1/26 {
egen ind_`i'_W = mean(wealth_R) if industry_main==`i', by(year)
gen ind_`i'_sh = ind_`i'_W / totalW
}


collapse ind_1_sh ind_2_sh ind_3_sh ind_4_sh ind_5_sh ind_6_sh ind_7_sh ind_8_sh ind_9_sh ///
ind_10_sh ind_11_sh ind_12_sh ind_13_sh ind_14_sh ind_15_sh ind_16_sh ind_17_sh ind_18_sh ///
ind_19_sh ind_20_sh ind_21_sh ind_22_sh ind_23_sh ind_24_sh ind_25_sh ind_26_sh, by(year)

sum ind_1_sh ind_2_sh ind_3_sh ind_4_sh ind_5_sh ind_6_sh ind_7_sh ind_8_sh ind_9_sh ///
ind_10_sh ind_11_sh ind_12_sh ind_13_sh ind_14_sh ind_15_sh ind_16_sh ind_17_sh ind_18_sh ///
ind_19_sh ind_20_sh ind_21_sh ind_22_sh ind_23_sh ind_24_sh ind_25_sh ind_26_sh



** --> we aggregate all industries with a share of total BILANZ wealth below 3%

*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

* aggregate industries
gen indusry_new = industry_main	
*1 pharmaceuticals; chemistry; biotechnology; synthetics; fertilizers
*2 trade; retail
replace indusry_new=26 if indusry_new==3 	
*4 shareholdings; investments
replace indusry_new=26 if indusry_new==5 	
*6 industry; manufacturing
*7 food, drinks and tobacco industry 
*8 banking; insurance; finance industry
replace indusry_new=26 if indusry_new==9
replace indusry_new=26 if indusry_new==10
replace indusry_new=6 if indusry_new==11
replace indusry_new=26 if indusry_new==12
replace indusry_new=4 if indusry_new==13
*14	watches; jewelry; luxury goods
*15	athletes---> new entertainment
replace indusry_new=15 if indusry_new==16
replace indusry_new=26 if indusry_new==17
replace indusry_new=15 if indusry_new==18
replace indusry_new=6 if indusry_new==19
replace indusry_new=26 if indusry_new==20
replace indusry_new=14 if indusry_new==21
* 22 fashion and textile industry
replace indusry_new=26 if indusry_new==23
* 24 shipping; transportation; distribution; logistics
replace indusry_new=26 if indusry_new==25
* 26 other 

tab indusry_new

collapse (sum) wealth_R, by (year indusry_new)
egen totalW = sum(wealth_R), by(year)	

forvalues i = 1/26 {
egen ind_`i'_W = mean(wealth_R) if indusry_new==`i', by(year)
gen ind_`i'_sh = ind_`i'_W / totalW
replace ind_`i'_sh=. if year==1994
}

collapse ind_1_sh ind_2_sh ind_3_sh ind_4_sh ind_5_sh ind_6_sh ind_7_sh ind_8_sh ind_9_sh ///
ind_10_sh ind_11_sh ind_12_sh ind_13_sh ind_14_sh ind_15_sh ind_16_sh ind_17_sh ind_18_sh ///
ind_19_sh ind_20_sh ind_21_sh ind_22_sh ind_23_sh ind_24_sh ind_25_sh ind_26_sh, by(year)

keep year ind_1_sh ind_2_sh ind_4_sh  ind_6_sh ind_7_sh ind_8_sh ind_14_sh ind_15_sh ind_22_sh ind_24_sh ind_26_sh

replace ind_15_sh=0 if ind_15_sh==. & year!=1994

*1994 was a special issue with only 50 obs. To avoid distorting the figure due to this much smaller sample size, we replace 1994 with the average from 1993 and 1995
foreach var in ind_1_sh ind_2_sh ind_4_sh ind_6_sh ind_7_sh ind_8_sh ind_14_sh ind_15_sh ind_22_sh ind_24_sh ind_26_sh {
replace `var'	= ((`var'[_n-1] + `var'[_n+1]) /2) if year==1994
}

*** label variables for figures
label var ind_1_sh "pharmaceuticals, chemistry, biotechnology"
label var ind_2_sh "trade, retail"
label var ind_4_sh "shareholdings, investments (incl. real estate)"
label var ind_6_sh "industry, manufacturing"
label var ind_7_sh "food, drinks and tobacco industry"
label var ind_8_sh "banking, insurance, finance industry"
label var ind_14_sh "luxury goods (incl. watches, jewelry, perfumes, cosmetics)"
label var ind_15_sh "sports and entertainment"
label var ind_22_sh "fashion and textile industry"
label var ind_24_sh "shipping, transportation, logistics"
label var ind_26_sh "other"

gen sh1 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh + ind_7_sh + ind_6_sh + ind_4_sh + ind_2_sh + ind_1_sh
gen sh2 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh + ind_7_sh + ind_6_sh + ind_4_sh + ind_2_sh
gen sh3 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh + ind_7_sh + ind_6_sh + ind_4_sh 
gen sh4 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh + ind_7_sh + ind_6_sh 
gen sh5 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh + ind_7_sh 
gen sh6 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh + ind_8_sh 
gen sh7 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh + ind_14_sh 
gen sh8 = ind_26_sh + ind_24_sh + ind_22_sh + ind_15_sh 
gen sh9 = ind_26_sh + ind_24_sh + ind_22_sh 
gen sh10 = ind_26_sh + ind_24_sh 
gen sh11 = ind_26_sh 

label var sh1 "pharmaceuticals, chemistry, biotechnology"
label var sh2 "trade, retail"
label var sh3 "shareholdings, investments (incl. real estate)"
label var sh4 "industry, manufacturing"
label var sh5 "food, drinks and tobacco industry"
label var sh6 "banking, insurance, finance industry"
label var sh7 "luxury goods (incl. watches, jewelry, cosmetics)"
label var sh8 "sports and entertainment"
label var sh9 "fashion and textile industry"
label var sh10 "shipping, transportation, logistics"
label var sh11 "other"



 
*Demonstration of -parea- capabilities.
ssc install twoway_parea, replace
 /*
Demonstration of -parea- capabilities.
Draw 10 patterned rectangles in a 2 by 5 formation

by Sergiy Radyakin 12. Aug, 2007



#delimit ;

clear;
set obs 60;
gen x=. ;
gen y=. ;
gen p=. ;

forvalues i=1/10 { ;
  local sh_y=0 ;
  if `i'>5 local sh_y=1 ;
  quietly replace x=`i'-.2-`sh_y'*5  in `=`i'*6-5' ;
  quietly replace x=`i'-1 -`sh_y'*5  in `=`i'*6-4' ;
  quietly replace x=`i'-1 -`sh_y'*5  in `=`i'*6-3' ;
  quietly replace x=`i'-.2-`sh_y'*5  in `=`i'*6-2' ;
  quietly replace x=`i'-.2-`sh_y'*5  in `=`i'*6-1' ;

  quietly replace y=`sh_y'*0.55      in `=`i'*6-5' ;
  quietly replace y=`sh_y'*0.55      in `=`i'*6-4' ;
  quietly replace y=`sh_y'*0.55+.5   in `=`i'*6-3' ;
  quietly replace y=`sh_y'*0.55+.5   in `=`i'*6-2' ;
  quietly replace y=`sh_y'*0.55      in `=`i'*6-1' ;  

  quietly replace p=`i' in `=`i'*6-5'/`=`i'*6-1'   ;
  local labl `"`labl' label(`i' "pattern`i'")"'    ;
} ;

local GRAPHCMD="twoway " ;
forvalues i=1/10 { ;
  local GRAPHCMD `"`GRAPHCMD' parea y x if p==`i',lc(black) fc(black)
                           pattern(pattern`i') fi(100) nodropb `=cond(`i'<10,"||","")'"' ;
};

local GRAPHCMD `"`GRAPHCMD' graphregion(color(white) margin(zero))
                            plotregion(style(none) color(white))
                            xlabel(none) xscale(off r(0 4.5)) 
                            ylabel(none) yscale(off r(0 1.05))
			    legend(cols(5) `labl') scale(0.8)
                            title("twoway parea y x,...") subtitle("Sergiy Radyakin") "'  ;

`GRAPHCMD' ;

*/


cd "$mypath/output/figures/"	

tw (parea sh1 year, color(gray) lcolor(gray) lwidth(thin) pattern(pattern1)) ///
(parea sh2 year, color(gray) lcolor(gray) lwidth(thin) pattern(pattern4))  ///
(parea sh3 year, color(gray) lcolor(gray) lwidth(thin) pattern(pattern9)) ///
(area sh4 year, color(white) lcolor(black) lwidth(thin) ) ///
(parea sh5 year, color(black) lcolor(black) lwidth(thin) pattern(pattern1)) ///
(parea sh6 year, color(black) lcolor(black) lwidth(thin) pattern(pattern4)) ///
(parea sh7 year , color(black) lcolor(black) lwidth(thin) pattern(pattern9)) ///
(area sh8 year , color(gs14) lcolor(black) lwidth(thin) ) ///
(area sh9 year , color(gs10) lcolor(black) lwidth(thin) ) ///
(area sh10 year , color(gs5) lcolor(black) lwidth(thin) ) ///	
(area sh11 year , color(black) lcolor(black)  ) ///		
, xlab(1990(5)2020, labsize(small))  xtitle("") legend(row(6) size(small)) ///
 ylab(0 "" 0.1 "10%" 0.2 "20%" 0.3 "30%" 0.4 "40%" 0.5 "50%" 0.6 "60%" 0.7 "70%" 0.8 "80%" 0.9 "90%" 1 "100%", labsize(vsmall) grid)

 graph display, ysize(12) xsize(20)

 graph export "industries.pdf", as(pdf) replace

 


***************************************************************************************************************************************************
***** Appendix
***************************************************************************************************************************************************
 
** Number of billionaires **
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear


sort year
by year: sum wealth_R
keep if wealth_R >= 1000 
collapse (count) id_pers, by(year)
label var id_pers "billionaires"

cd "$mypath/output/figures/"
drop if year==1990 | year==1992 | year==1994 | year==1996 | year==1998 | year==2000 | year==2002 | year==2004 | year==2006 | year==2008 | year==2010 | year==2012 | year==2014 | year==2016 | year==2018 

graph bar (asis) id_pers, over(year, label(angle(90) labsize(small))) bar(1, fcolor(black) lcolor(black))  ///
blabel(total, position(inside) size(vsmall) color(white)) legend(row(1)) ytitle("") ylab(, labsize(small) grid) 
graph export "billionaires.pdf", as(pdf) replace
 

 
 
 
 

** Table Summary Statistics **	
*load data
cd "$mypath/"
use "2560_BILANZ-SwissRichListDataset_1989-2020_Data_v1.0.dta", clear

sort year 
cd "$mypath/output/tables/"

ssc install estout, replace
estimates clear
eststo: estpost tabstat n_magazine, by(year) stat(mean)
eststo: estpost tabstat id_pers, by(year) stat(count)
eststo: estpost tabstat id_pers if family==1, by(year) stat(count)
eststo: estpost tabstat id_pers if family==0 & female==0, by(year) stat(count)
eststo: estpost tabstat id_pers if family==0 & female==1, by(year) stat(count)
eststo: estpost tabstat wealth_R, by(year) stat(mean)
eststo: estpost tabstat wealth_R, by(year) stat(p50)
eststo: estpost tabstat wealth_R, by(year) stat(sd)
eststo: estpost tabstat wealth_R if family==1, by(year) stat(mean)
eststo: estpost tabstat wealth_R if family==0, by(year) stat(mean)

* create a summary statistics table
esttab using tab_b1.csv, cells(year mean p50 sd count) replace


estimates clear
* mean and median real wealth period 1999-2020
eststo: estpost tabstat wealth_R if year>1998, by(year) stat(mean)
// 1712.556 
eststo: estpost tabstat wealth_R if year>1998, by(year) stat(median)
// 643.6519 

estimates clear

** wealth distribution of the dataset **
eststo: estpost tabstat wealth_R if year>1998, by(year) stat(p10 p25 p50 p75 p90 p99)
esttab using tab_b2.csv, cells(year p10 p25 p50 p75 p90 p99) replace

estimates clear
by id_pers, sort: gen id_pers_count = _n == 1
tabstat id_pers_count if id_pers_count==1, stat(count)
///898

by id_fam, sort: gen id_fam_count = _n == 1
tabstat id_fam_count if id_fam_count==1, stat(count)
/// 713
 
 
 
 
 
 